{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data download"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2018-06-07 09:47:02--  ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/hapmap3/plink_format/draft_2/hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2\n",
      "           => ‘hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2’\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 130.14.250.11\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|130.14.250.11|:21... connected.\n",
      "Logging in as anonymous ... Logged in!\n",
      "==> SYST ... done.    ==> PWD ... done.\n",
      "==> TYPE I ... done.  ==> CWD (1) /hapmap/genotypes/hapmap3/plink_format/draft_2 ... done.\n",
      "==> SIZE hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2 ... 10590722\n",
      "==> PASV ... done.    ==> RETR hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2 ... done.\n",
      "Length: 10590722 (10M) (unauthoritative)\n",
      "\n",
      "hapmap3_r2_b36_fwd. 100%[===================>]  10.10M  3.45MB/s    in 2.9s    \n",
      "\n",
      "2018-06-07 09:47:06 (3.45 MB/s) - ‘hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2’ saved [10590722]\n",
      "\n",
      "--2018-06-07 09:47:06--  ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/hapmap3/plink_format/draft_2/hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2\n",
      "           => ‘hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2’\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 130.14.250.11\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|130.14.250.11|:21... connected.\n",
      "Logging in as anonymous ... Logged in!\n",
      "==> SYST ... done.    ==> PWD ... done.\n",
      "==> TYPE I ... done.  ==> CWD (1) /hapmap/genotypes/hapmap3/plink_format/draft_2 ... done.\n",
      "==> SIZE hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2 ... 757962593\n",
      "==> PASV ... done.    ==> RETR hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2 ... done.\n",
      "Length: 757962593 (723M) (unauthoritative)\n",
      "\n",
      "hapmap3_r2_b36_fwd. 100%[===================>] 722.85M  3.78MB/s    in 2m 23s  \n",
      "\n",
      "2018-06-07 09:49:31 (5.06 MB/s) - ‘hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2’ saved [757962593]\n",
      "\n",
      "--2018-06-07 09:49:31--  ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/hapmap3/plink_format/draft_2/relationships_w_pops_121708.txt\n",
      "           => ‘relationships_w_pops_121708.txt’\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.228\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.228|:21... connected.\n",
      "Logging in as anonymous ... Logged in!\n",
      "==> SYST ... done.    ==> PWD ... done.\n",
      "==> TYPE I ... done.  ==> CWD (1) /hapmap/genotypes/hapmap3/plink_format/draft_2 ... done.\n",
      "==> SIZE relationships_w_pops_121708.txt ... 36765\n",
      "==> PASV ... done.    ==> RETR relationships_w_pops_121708.txt ... done.\n",
      "Length: 36765 (36K) (unauthoritative)\n",
      "\n",
      "relationships_w_pop 100%[===================>]  35.90K  --.-KB/s    in 0.1s    \n",
      "\n",
      "2018-06-07 09:49:32 (293 KB/s) - ‘relationships_w_pops_121708.txt’ saved [36765]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#XXX changes\n",
    "!wget ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/hapmap3/plink_format/draft_2/hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2\n",
    "!wget ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/hapmap3/plink_format/draft_2/hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2\n",
    "\n",
    "!wget ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/hapmap3/plink_format/draft_2/relationships_w_pops_121708.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!bunzip2 hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2\n",
    "!bunzip2 hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Preparation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading HapMap meta-data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "f = open('relationships_w_pops_121708.txt')\n",
    "pop_ind = defaultdict(list)\n",
    "f.readline()  # header\n",
    "offspring = []\n",
    "for l in f:\n",
    "    toks = l.rstrip().split('\\t')\n",
    "    fam_id = toks[0]\n",
    "    ind_id = toks[1]\n",
    "    mom = toks[2]\n",
    "    dad = toks[3]\n",
    "    if mom != '0' or dad != '0':\n",
    "        offspring.append((fam_id, ind_id))\n",
    "    pop = toks[-1]\n",
    "    pop_ind[pop].append((fam_id, ind_id))\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sub-sampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.system('plink --recode --file hapmap3_r2_b36_fwd.consensus.qc.poly --noweb --out hapmap10 --thin 0.1 --geno 0.1')\n",
    "os.system('plink --recode --file hapmap3_r2_b36_fwd.consensus.qc.poly --noweb --out hapmap1 --thin 0.01 --geno 0.1')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Getting only autosomal data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_non_auto_SNPs(map_file, exclude_file):\n",
    "    f = open(map_file)\n",
    "    w = open(exclude_file, 'w')\n",
    "    for l in f:\n",
    "        toks = l.rstrip().split('\\t')\n",
    "        chrom = int(toks[0])\n",
    "        rs = toks[1]\n",
    "        if chrom > 22:\n",
    "            w.write('%s\\n' % rs)\n",
    "    w.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "get_non_auto_SNPs('hapmap10.map', 'exclude10.txt')\n",
    "get_non_auto_SNPs('hapmap1.map', 'exclude1.txt')\n",
    "#get_non_auto_SNPs('hapmap3_r2_b36_fwd.consensus.qc.poly.map', 'exclude.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Logging to hapmap10_auto.log.\n",
      "Options in effect:\n",
      "  --exclude exclude10.txt\n",
      "  --file hapmap10\n",
      "  --noweb\n",
      "  --out hapmap10_auto\n",
      "  --recode\n",
      "\n",
      "Note: --noweb has no effect since no web check is implemented yet.\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (143993 variants, 1184 people).\n",
      "--file: hapmap10_auto-temporary.bed + hapmap10_auto-temporary.bim +\n",
      "hapmap10_auto-temporary.fam written.\n",
      "143993 variants loaded from .bim file.\n",
      "1184 people (589 males, 595 females) loaded from .fam.\n",
      "--exclude: 138760 variants remaining.\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 196 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate is 0.99773.\n",
      "138760 variants and 1184 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "--recode ped to hapmap10_auto.ped + hapmap10_auto.map ... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899done.\n",
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Logging to hapmap1_auto.log.\n",
      "Options in effect:\n",
      "  --exclude exclude1.txt\n",
      "  --file hapmap1\n",
      "  --noweb\n",
      "  --out hapmap1_auto\n",
      "  --recode\n",
      "\n",
      "Note: --noweb has no effect since no web check is implemented yet.\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (14422 variants, 1184 people).\n",
      "--file: hapmap1_auto-temporary.bed + hapmap1_auto-temporary.bim +\n",
      "hapmap1_auto-temporary.fam written.\n",
      "14422 variants loaded from .bim file.\n",
      "1184 people (589 males, 595 females) loaded from .fam.\n",
      "--exclude: 13892 variants remaining.\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 196 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate is 0.997728.\n",
      "13892 variants and 1184 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "--recode ped to hapmap1_auto.ped + hapmap1_auto.map ... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899done.\n"
     ]
    }
   ],
   "source": [
    "!plink --recode --file hapmap10 --noweb --out hapmap10_auto --exclude exclude10.txt\n",
    "!plink --recode --file hapmap1 --noweb --out hapmap1_auto --exclude exclude1.txt\n",
    "#geno!!!\n",
    "#!plink --recode --file hapmap3_r2_b36_fwd.consensus.qc.poly --noweb --out hapmap_auto --exclude exclude.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Removing offspring"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Logging to hapmap10_auto_noofs.log.\n",
      "Options in effect:\n",
      "  --file hapmap10_auto\n",
      "  --filter-founders\n",
      "  --out hapmap10_auto_noofs\n",
      "  --recode\n",
      "\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (138760 variants, 1184 people).\n",
      "--file: hapmap10_auto_noofs-temporary.bed + hapmap10_auto_noofs-temporary.bim +\n",
      "hapmap10_auto_noofs-temporary.fam written.\n",
      "138760 variants loaded from .bim file.\n",
      "1184 people (589 males, 595 females) loaded from .fam.\n",
      "196 people removed due to founder status (--filter-founders).\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 0 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate in remaining samples is 0.99772.\n",
      "138760 variants and 988 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "--recode ped to hapmap10_auto_noofs.ped + hapmap10_auto_noofs.map ... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899done.\n"
     ]
    }
   ],
   "source": [
    "!plink --file hapmap10_auto --filter-founders --recode --out hapmap10_auto_noofs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## LD-prunning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Logging to keep.log.\n",
      "Options in effect:\n",
      "  --file hapmap10_auto_noofs\n",
      "  --indep-pairwise 50 10 0.1\n",
      "  --out keep\n",
      "\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (138760 variants, 988 people).\n",
      "--file: keep-temporary.bed + keep-temporary.bim + keep-temporary.fam written.\n",
      "138760 variants loaded from .bim file.\n",
      "988 people (488 males, 500 females) loaded from .fam.\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 0 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate is 0.99772.\n",
      "138760 variants and 988 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "Pruned 7057 variants from chromosome 1, leaving 4547.\n",
      "Pruned 7330 variants from chromosome 2, leaving 4300.\n",
      "Pruned 5857 variants from chromosome 3, leaving 3643.\n",
      "Pruned 5298 variants from chromosome 4, leaving 3372.\n",
      "Pruned 5489 variants from chromosome 5, leaving 3449.\n",
      "Pruned 5659 variants from chromosome 6, leaving 3389.\n",
      "Pruned 4656 variants from chromosome 7, leaving 3023.\n",
      "Pruned 4695 variants from chromosome 8, leaving 2824.\n",
      "Pruned 3796 variants from chromosome 9, leaving 2549.\n",
      "Pruned 4485 variants from chromosome 10, leaving 2905.\n",
      "Pruned 4271 variants from chromosome 11, leaving 2670.\n",
      "Pruned 3963 variants from chromosome 12, leaving 2854.\n",
      "Pruned 3078 variants from chromosome 13, leaving 2089.\n",
      "Pruned 2648 variants from chromosome 14, leaving 1916.\n",
      "Pruned 2505 variants from chromosome 15, leaving 1760.\n",
      "Pruned 2572 variants from chromosome 16, leaving 1945.\n",
      "Pruned 2058 variants from chromosome 17, leaving 1741.\n",
      "Pruned 2357 variants from chromosome 18, leaving 1748.\n",
      "Pruned 1370 variants from chromosome 19, leaving 1323.\n",
      "Pruned 2091 variants from chromosome 20, leaving 1545.\n",
      "Pruned 1123 variants from chromosome 21, leaving 811.\n",
      "Pruned 1057 variants from chromosome 22, leaving 942.\n",
      "Pruning complete.  83415 of 138760 variants removed.\n",
      "Marker lists written to keep.prune.in and keep.prune.out .\n",
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Logging to hapmap10_auto_noofs_ld.log.\n",
      "Options in effect:\n",
      "  --extract keep.prune.in\n",
      "  --file hapmap10_auto_noofs\n",
      "  --out hapmap10_auto_noofs_ld\n",
      "  --recode\n",
      "\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (138760 variants, 988 people).\n",
      "--file: hapmap10_auto_noofs_ld-temporary.bed +\n",
      "hapmap10_auto_noofs_ld-temporary.bim + hapmap10_auto_noofs_ld-temporary.fam\n",
      "written.\n",
      "138760 variants loaded from .bim file.\n",
      "988 people (488 males, 500 females) loaded from .fam.\n",
      "--extract: 55345 variants remaining.\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 0 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate is 0.997666.\n",
      "55345 variants and 988 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "--recode ped to hapmap10_auto_noofs_ld.ped + hapmap10_auto_noofs_ld.map ...\n",
      "101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899done.\n"
     ]
    }
   ],
   "source": [
    "!plink --file hapmap10_auto_noofs --indep-pairwise 50 10 0.1 --out keep\n",
    "!plink --file hapmap10_auto_noofs --extract keep.prune.in --recode --out hapmap10_auto_noofs_ld"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Different coding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Note: --recode12 flag deprecated.  Use 'recode 12 ...'.\n",
      "Logging to hapmap10_auto_noofs_ld_12.log.\n",
      "Options in effect:\n",
      "  --file hapmap10_auto_noofs_ld\n",
      "  --out hapmap10_auto_noofs_ld_12\n",
      "  --recode 12 tab\n",
      "\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (55345 variants, 988 people).\n",
      "--file: hapmap10_auto_noofs_ld_12-temporary.bed +\n",
      "hapmap10_auto_noofs_ld_12-temporary.bim +\n",
      "hapmap10_auto_noofs_ld_12-temporary.fam written.\n",
      "55345 variants loaded from .bim file.\n",
      "988 people (488 males, 500 females) loaded from .fam.\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 0 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate is 0.997666.\n",
      "55345 variants and 988 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "--recode ped to hapmap10_auto_noofs_ld_12.ped + hapmap10_auto_noofs_ld_12.map\n",
      "... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899done.\n",
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Logging to hapmap10_auto_noofs_ld.log.\n",
      "Options in effect:\n",
      "  --file hapmap10_auto_noofs_ld\n",
      "  --make-bed\n",
      "  --out hapmap10_auto_noofs_ld\n",
      "\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (55345 variants, 988 people).\n",
      "--file: hapmap10_auto_noofs_ld-temporary.bed +\n",
      "hapmap10_auto_noofs_ld-temporary.bim + hapmap10_auto_noofs_ld-temporary.fam\n",
      "written.\n",
      "55345 variants loaded from .bim file.\n",
      "988 people (488 males, 500 females) loaded from .fam.\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 0 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate is 0.997666.\n",
      "55345 variants and 988 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "--make-bed to hapmap10_auto_noofs_ld.bed + hapmap10_auto_noofs_ld.bim +\n",
      "hapmap10_auto_noofs_ld.fam ... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899done.\n"
     ]
    }
   ],
   "source": [
    "!plink --file hapmap10_auto_noofs_ld --recode12 tab --out hapmap10_auto_noofs_ld_12\n",
    "!plink --make-bed --file hapmap10_auto_noofs_ld --out hapmap10_auto_noofs_ld"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Single chromosome"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PLINK v1.90b4 64-bit (20 Mar 2017)             www.cog-genomics.org/plink/1.9/\n",
      "(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3\n",
      "Logging to hapmap10_auto_noofs_2.log.\n",
      "Options in effect:\n",
      "  --chr 2\n",
      "  --file hapmap10_auto_noofs\n",
      "  --out hapmap10_auto_noofs_2\n",
      "  --recode\n",
      "\n",
      "7879 MB RAM detected; reserving 3939 MB for main workspace.\n",
      ".ped scan complete (for binary autoconversion).\n",
      "Performing single-pass .bed write (11630 variants, 988 people).\n",
      "--file: hapmap10_auto_noofs_2-temporary.bed +\n",
      "hapmap10_auto_noofs_2-temporary.bim + hapmap10_auto_noofs_2-temporary.fam\n",
      "written.\n",
      "11630 variants loaded from .bim file.\n",
      "988 people (488 males, 500 females) loaded from .fam.\n",
      "Using 1 thread (no multithreaded calculations invoked).\n",
      "Before main variant filters, 988 founders and 0 nonfounders present.\n",
      "Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.\n",
      "Total genotyping rate is 0.997713.\n",
      "11630 variants and 988 people pass filters and QC.\n",
      "Note: No phenotypes present.\n",
      "--recode ped to hapmap10_auto_noofs_2.ped + hapmap10_auto_noofs_2.map ... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899done.\n"
     ]
    }
   ],
   "source": [
    "!plink --recode --file hapmap10_auto_noofs --chr 2 --out hapmap10_auto_noofs_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
